#install.packages("elevatr")
#remotes::install_github("wmgeolab/rgeoboundaries")
#load packages
library(vcfR)
library(ggplot2)
library(adegenet)
library(SNPfiltR)
library(StAMPP)
library(viridis)
library(rgeoboundaries)
library(elevatr)
library(raster)
library(sf)
library(ggpubr)
#check out the details on the filtered vcf
vcfR <- read.vcfR("~/Desktop/mex.towhees/towhee.75.mac2.nomito.vcf")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 4800
## header_line: 4801
## variant count: 5004
## column count: 41
##
Meta line 1000 read in.
Meta line 2000 read in.
Meta line 3000 read in.
Meta line 4000 read in.
Meta line 4800 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 5004
## Character matrix gt cols: 41
## skip: 0
## nrows: 5004
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant: 5004
## All variants processed
vcfR
## ***** Object of Class vcfR *****
## 32 samples
## 2668 CHROMs
## 5,004 variants
## Object size: 7.6 Mb
## 2.643 percent missing data
## ***** ***** *****
#read in sample info csv
sample.info<-read.csv("~/Desktop/mex.towhees/sample.locs.csv")
#make sure sampling file matches vcf
sample.info$ID %in% colnames(vcfR@gt)[-1]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
colnames(vcfR@gt)[-1] %in% sample.info$ID
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
#if needed, subset sampling file to include only samples in vcf
#sample.info<-sample.info[sample.info$id %in% colnames(vcfR@gt)[-1],]
#reorder sampling file to match order of samples in vcf
sample.info<-sample.info[match(colnames(vcfR@gt)[-1], sample.info$ID),]
sample.info$ID == colnames(vcfR@gt)[-1]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
sample.info$sample.loc<-as.character(sample.info$sample.loc)
#add mitochondrial haplotype info to dataframe
sample.info$haplotype<-c(rep("maculatus", times=6),rep("ocai", times=4),
rep("maculatus", times=18),rep("ocai", times=3),"maculatus")
#make dataframe with only the 8 unique sampling locs
unq<-sample.info[!duplicated(sample.info$lat),]
#download elevation data for Mexico
mex_bound <- geoboundaries("Mexico")
elevation_data <- get_elev_raster(locations = mex_bound, z = 6, clip = "locations")
## Registered S3 method overwritten by 'httr':
## method from
## print.cache_info hoardr
## Mosaicing & Projecting
## Clipping DEM to locations
## Note: Elevation units are in meters.
elevation_data <- as.data.frame(elevation_data, xy = TRUE)
colnames(elevation_data)[3] <- "elevation"
# remove rows of data frame with one or more NA's,using complete.cases
elevation_data <- elevation_data[complete.cases(elevation_data), ]
#make subset plotting area corresponding to our sampling
#subset elevation data based on latitude and longitude limits
elevation_data<-elevation_data[elevation_data$x >= -105.5 & elevation_data$x <= -96 &
elevation_data$y >= 16 & elevation_data$y <= 23,]
#subset mexico map outline based on limits
mex_bound <- st_crop(mex_bound, xmin = -105.5, xmax = -96,
ymin = 16, ymax = 23)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
#make custom color-set based on towhee habitat
elevation_data$towhee.hab<-elevation_data$elevation
#color-code inhospitable habitat versus hospitable
elevation_data$towhee.hab<-ifelse(elevation_data$towhee.hab < 1676.4, "unsuitable","marginal")
table(elevation_data$towhee.hab)
##
## marginal unsuitable
## 188945 239253
#color code good versus marginal habitat
good.hab <- elevation_data$elevation > 2133.6
table(replace(elevation_data$towhee.hab, good.hab, "suitable"))
##
## marginal suitable unsuitable
## 104234 84711 239253
elevation_data$towhee.hab<-replace(elevation_data$towhee.hab, good.hab, "suitable")
#plot the map with ggplot
ggplot() +
geom_raster(data = elevation_data, aes(x = x, y = y, fill = elevation)) +
geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=FALSE) +
scale_color_brewer(palette = "RdGy")+
geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
#scale_fill_gradient(low = "white", high = "black")+
scale_fill_gradientn(colours = terrain.colors(50))+
labs(x = "Longitude", y = "Latitude", fill = "Elevation\n(meters)")+
theme_classic()+
theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
legend.background = element_blank())
#plot the map with ggplot
ggplot() +
geom_raster(data = elevation_data, aes(x = x, y = y, fill = towhee.hab)) +
scale_fill_discrete(type =c("gray","black","white"))+
geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
geom_point(data = unq, aes(x = long, y = lat, color="red"), size=3, show.legend=FALSE) +
#scale_color_brewer(palette = "RdGy")+
#geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 5, color="white")+
labs(x = "Longitude", y = "Latitude", fill = "Elevation")+
theme_classic()+
theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
legend.background = element_blank())
elev.map<-ggplot() +
geom_raster(data = elevation_data, aes(x = x, y = y, fill = towhee.hab)) +
scale_fill_discrete(type =c("gray","black","white"))+
geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
geom_point(data = unq, aes(x = long, y = lat, color="red"), size=3, show.legend=FALSE) +
#scale_color_brewer(palette = "RdGy")+
#geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 5, color="white")+
labs(x = "Longitude", y = "Latitude", fill = "Elevation")+
theme_classic()+
theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
legend.background = element_blank())
ggsave(plot = elev.map, filename = "~/Desktop/mex.towhees/sibley.elev.map.pdf", width=8.5, height=5.5)
#plot the map with ggplot
ggplot() +
geom_raster(data = elevation_data, aes(x = x, y = y, fill = elevation)) +
geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=FALSE) +
scale_color_brewer(palette = "PuOr")+
geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
scale_fill_gradient(low = "white", high = "black")+
#scale_fill_gradientn(colours = terrain.colors(50))+
labs(x = "Longitude", y = "Latitude", fill = "Elevation\n(meters)")+
theme_classic()+
theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
legend.background = element_blank())
ggplot() +
geom_raster(data = elevation_data, aes(x = x, y = y, fill = elevation)) +
geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=FALSE) +
scale_color_brewer(palette = "BrBG")+
geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
scale_fill_gradient(low = "white", high = "black")+
#scale_fill_gradientn(colours = terrain.colors(50))+
labs(x = "Longitude", y = "Latitude", fill = "Elevation\n(meters)")+
theme_classic()+
theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
legend.background = element_blank())
#make empty map for SNAPP figure
pac<-map_data("world")
#
ggplot()+
geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="seashell", col="black", cex=.3)+
coord_sf(xlim = c(-105.5, -96), ylim = c(16, 23)) +
geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=TRUE) +
scale_color_brewer(palette = "RdGy")+
theme_classic()+
geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
theme(legend.position = "none")+
xlab("Longitude")+
ylab("Latitude")
ggplot()+
geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="oldlace", col="black", cex=.3)+
coord_sf(xlim = c(-105.5, -96), ylim = c(16, 23)) +
geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=TRUE) +
scale_color_brewer(palette = "RdGy")+
theme_classic()+
geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
theme(legend.position = "none")+
xlab("Longitude")+
ylab("Latitude")
#morph
morph.pca <- prcomp(sample.info[,c(8:13)], center = TRUE,scale. = TRUE)
sample.info$pc1morph<-morph.pca$x[,1]
sample.info$pc2morph<-morph.pca$x[,2]
ggplot(sample.info, aes(x=pc1morph, y=pc2morph, color=sample.loc)) +
geom_point(cex=4, alpha=.8)+
theme_classic()+
scale_color_brewer(palette = "RdGy")
#plumage
morph.pca <- prcomp(sample.info[,c(16:21)], center = TRUE,scale. = TRUE)
sample.info$pc1plum<-morph.pca$x[,1]
sample.info$pc2plum<-morph.pca$x[,2]
ggplot(sample.info, aes(x=pc1plum, y=pc2plum, color=sample.loc)) +
geom_point(cex=4, alpha=.8)+
theme_classic()+
scale_color_brewer(palette = "RdGy")
summary(morph.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 2.1211 0.8234 0.6099 0.48872 0.37894 0.26199
## Proportion of Variance 0.7498 0.1130 0.0620 0.03981 0.02393 0.01144
## Cumulative Proportion 0.7498 0.8628 0.9248 0.96463 0.98856 1.00000
#plot genetic pca
popm<-data.frame(id=sample.info$ID, pop=sample.info$sample.loc)
gen.pc<-assess_missing_data_pca(vcfR, popmap = popm, clustering = FALSE)
#add genetic PC1 & PC2 to sample info dataframe
sample.info$genpc1<-gen.pc$PC1
sample.info$genpc2<-gen.pc$PC2
#plot genetic PCA
ggplot(sample.info, aes(x=genpc1, y=genpc2, color=sample.loc, shape=haplotype)) +
geom_point(cex=4, alpha=.8)+
theme_classic()+
scale_color_brewer(palette = "RdGy")
#plot genetic PC1 against plumage PC1
ggplot(sample.info, aes(x=genpc1, y=pc1plum, color=sample.loc)) +
geom_point(cex=4, alpha=.8)+
theme_classic()+
scale_color_brewer(palette = "RdGy")
#plot genetic PC1 against morph PC1
ggplot(sample.info, aes(x=genpc1, y=pc1morph, color=sample.loc)) +
geom_point(cex=4, alpha=.8)+
theme_classic()+
#geom_smooth(method=lm)+
scale_color_brewer(palette = "RdGy")
###make Splitstree
#convert to genlight
gen<-vcfR2genlight(vcfR)
pop(gen)<-gen@ind.names
sample.div.mac.75 <- stamppNeisD(gen, pop = FALSE)
#export for splitstree
#stamppPhylip(distance.mat=sample.div.mac.75, file="~/Desktop/mex.towhees/towhee.75.mac.splits.txt")
#75% completeness cutoff with MAC splitstree
knitr::include_graphics(c("/Users/devder/Desktop/mex.towhees/towhee.75.mac.nomito.splitstree.png"))
#add dots color coded by locality in photoshop
#calc pairwise Fst
gen@pop<-as.factor(sample.info$sample.loc)
di.heat<-stamppFst(gen)
m<-di.heat$Fsts
#fill in upper triangle of matrix
m[upper.tri(m)] <- t(m)[upper.tri(m)]
#melt for plotting
heat <- reshape::melt(m)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
heat$X1<-as.character(heat$X1)
heat$X2<-as.character(heat$X2)
#plot with labels
ggplot(data = heat, aes(x=X1, y=X2, fill=value)) +
geom_tile()+
geom_text(data=heat,aes(label=round(value, 2)))+
theme_minimal()+
scale_fill_gradient2(low = "white", high = "red", space = "Lab", name="Fst") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 8 rows containing missing values (geom_text).
#identify the number of fixed differences between pops
mat<-extract.gt(vcfR)
conv.mat<-mat
conv.mat[conv.mat == "0/0"]<-0
conv.mat[conv.mat == "0/1"]<-1
conv.mat[conv.mat == "1/1"]<-2
conv.mat<-as.data.frame(conv.mat)
#convert to numeric
for (i in 1:ncol(conv.mat)){
conv.mat[,i]<-as.numeric(as.character(conv.mat[,i]))
}
#show colnames to verify you're subsetting correctly
colnames(conv.mat) == sample.info$ID
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
#make vector to fill with fixed diff values
f<-c()
#write for loop to calc number of fixed diffs between each pop
for (i in 1:nrow(heat)){
#calc af of pop1 and pop2
pop1.af<-(rowSums(conv.mat[,sample.info$sample.loc == heat$X1[i]], na.rm=T)/(rowSums(is.na(conv.mat[,sample.info$sample.loc == heat$X1[i]]) == FALSE)))/2
pop2.af<-(rowSums(conv.mat[,sample.info$sample.loc == heat$X2[i]], na.rm=T)/(rowSums(is.na(conv.mat[,sample.info$sample.loc == heat$X2[i]]) == FALSE)))/2
#store number of fixed differences
f[i]<-sum(is.na(abs(pop1.af - pop2.af)) == FALSE & abs(pop1.af - pop2.af) == 1) #find fixed SNPs and add to vector
}
#add number of fixed diffs to df
heat$fixed<-f
#fix the assignments
heat$mixed<-heat$value
heat$mixed[c(9,12:15,17,18,20:24,25,29,33,41,44,45,47,49,52,53,57,58,60:63)]<-heat$fixed[c(9,12:15,17,18,20:24,25,29,33,41,44,45,47,49,52,53,57,58,60:63)]
#plot with labels
ggplot(data = heat, aes(x=X1, y=X2, fill=value)) +
geom_tile()+
#scale_x_discrete(limits=levels(heat$X2)[c(1,7,2,3,4,5,6)], labels=c("1:10","12:18","26","11","24:25","19","20:23"))+
#scale_y_discrete(limits=levels(heat$X2)[c(1,7,2,3,4,5,6)], labels=c("1:10","12:18","26","11","24:25","19","20:23"))+
geom_text(data=heat,aes(label=round(mixed, 2)), size=2.5)+
theme_minimal()+
scale_fill_gradient2(low = "white", high = "red", space = "Lab", name="Fst") +
theme(axis.text.x = element_text(vjust=.25, size=12),
axis.text.y = element_text(hjust = -.2, size=12),
axis.title.x = element_blank(), axis.title.y = element_blank())
## Warning: Removed 8 rows containing missing values (geom_text).
tri.data<-read.csv("~/Desktop/mex.towhees/triangle.plot.df.csv")
tri.data$loc<-as.character(tri.data$loc)
ggplot(tri.data, aes(x=hi.index, y=heterozygosity, color=loc)) +
geom_point(cex=6, alpha=.8)+
theme_classic()+
scale_color_brewer(palette = "RdGy")+
geom_abline(intercept = 0, slope = 2)+
geom_abline(intercept = 2, slope = -2)+
ylim(c(0,1))+
xlab("Hybrid Index")+
ylab("Interspecific Heterozygosity")+
theme(legend.position = "none")
#make figure that
#|_A_|_B_|
#|_C_|_D_|
#|_E_|_F_|
#|_G_|_H_|
#where ABCD = sampling map
#E = plumage PCA
#F = genomic PCA
#G = triangle plot
#H = pairwise Fst heatmap
#consider adding admixture plot in photoshop?
samp.map<-ggplot() +
geom_raster(data = elevation_data, aes(x = x, y = y, fill = elevation)) +
geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=FALSE) +
scale_color_brewer(palette = "RdGy")+
geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
#scale_fill_gradient(low = "white", high = "black")+
scale_fill_gradientn(colours = terrain.colors(50))+
labs(x = "Longitude", y = "Latitude", fill = "Elevation\n(meters)")+
theme_classic()+
theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
legend.background = element_blank())
plum.pca<-ggplot(sample.info, aes(x=pc1plum, y=pc2plum, color=sample.loc)) +
geom_point(cex=4, alpha=.8)+
theme_classic()+
scale_color_brewer(palette = "RdGy")+
theme(legend.position = "none")+
labs(x = "Plumage PC1,75% var. expl.", y = "Plumage PC2, 11.3% var. expl.")
gen.pca<-ggplot(sample.info, aes(x=genpc1, y=genpc2, color=sample.loc, shape=haplotype)) +
geom_point(cex=6, alpha=.8)+
theme_classic()+
scale_color_brewer(palette = "RdGy")+
theme(legend.position = "none")+
labs(x = "Genomic PC1, 7.9% var. expl.", y = "Genomic PC2, 4.5% var. expl.")
fst.plot<-ggplot(data = heat, aes(x=X1, y=X2, fill=value)) +
geom_tile()+
geom_text(data=heat,aes(label=round(mixed, 2)), size=2.5)+
theme_minimal()+
scale_fill_gradient2(low = "white", high = "red", space = "Lab", name="Fst") +
theme(axis.text.x = element_text(vjust=.25, size=12),
axis.text.y = element_text(hjust = -.2, size=12),
axis.title.x = element_blank(), axis.title.y = element_blank())
tri.plot<-ggplot(tri.data, aes(x=hi.index, y=heterozygosity, color=loc)) +
geom_point(cex=6, alpha=.8)+
theme_classic()+
scale_color_brewer(palette = "RdGy")+
geom_abline(intercept = 0, slope = 2)+
geom_abline(intercept = 2, slope = -2)+
ylim(c(0,1))+
xlab("Hybrid Index")+
ylab("Interspecific Heterozygosity")+
theme(legend.position = "none")
#save ggplots
#ggsave("~/Desktop/mex.towhees/elevation.samp.map.pdf", plot=samp.map, width = 6, height = 4.5, units = "in")
#ggsave("~/Desktop/mex.towhees/fst.plot.pdf", plot=fst.plot, width = 3.5, height = 2.6, units = "in")
#ggsave("~/Desktop/mex.towhees/plum.pca.pdf", plot=plum.pca, width = 4, height = 3.5, units = "in")
#ggsave("~/Desktop/mex.towhees/gen.pca.pdf", plot=gen.pca, width = 4, height = 3.5, units = "in")
#ggsave("~/Desktop/mex.towhees/tri.plot.pdf", plot=tri.plot, width = 4, height = 3.5, units = "in")